/* aes_dec-asm.S */
/*
    This file is part of the AVR-Crypto-Lib.
    Copyright (C) 2008, 2009  Daniel Otte (daniel.otte@rub.de)

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
	    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
/**
 * \file     aes_dec-asm.S
 * \email    daniel.otte@rub.de
 * \author   Daniel Otte 
 * \date     2009-01-10
 * \license  GPLv3 or later
 * 
 */

#include "avr-asm-macros.S"
A = 28
B = 29
P = 0
xREDUCER = 25 

.global aes256_dec
aes256_dec:
	ldi r20, 14
	rjmp aes_decrypt_core

.global aes192_dec
aes192_dec:
	ldi r20, 12
	rjmp aes_decrypt_core

.global aes128_dec
aes128_dec:
	ldi r20, 10


/*
  void aes_decrypt_core(aes_cipher_state_t *state, const aes_genctx_t *ks, uint8_t rounds)
*/
T0= 2
T1= 3
T2= 4
T3= 5
T4 = 6
T5 = 7
ST00 =  8
ST01 =  9
ST02 = 10
ST03 = 11
ST10 = 12
ST11 = 13
ST12 = 14
ST13 = 15
ST20 = 16
ST21 = 17
ST22 = 18
ST23 = 19
ST30 = 20
ST31 = 21
ST32 = 22
ST33 = 23
CTR = 24
/*
 * param state:  r24:r25
 * param ks:     r22:r23
 * param rounds: r20   
 */
.global aes_decrypt_core
aes_decrypt_core:
	push_range 2, 17
	push r28
	push r29
	push r24
	push r25
	movw r26, r22
	movw r30, r24
	mov  CTR, r20
	inc r20
	swap r20 /* r20*16 */
	add r26, r20
	adc r27, r1
	clt
	.irp param, ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33
		ld \param, Z+
	.endr
	
	ldi xREDUCER, 0x1b /* load reducer */
	

	.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
		ld r0, -X
		eor \param, r0
	.endr
1:
	dec CTR
	brne 2f
	set
2:	
	ldi r31, hi8(aes_invsbox)
	/* substitute and invShift */
	.irp param, ST00, ST10, ST20, ST30
		mov r30, \param
		lpm \param, Z
	.endr
	mov r30, ST31
	lpm T0, Z
	mov r30, ST21
	lpm ST31, Z
	mov r30, ST11
	lpm ST21, Z
	mov r30, ST01
	lpm ST11, Z
	mov ST01, T0
	
	mov r30, ST32
	lpm T0, Z
	mov r30, ST22
	lpm T1,Z
	mov r30, ST12
	lpm ST32, Z
	mov r30, ST02
	lpm ST22, Z
	mov ST12, T0
	mov ST02, T1
	
	mov r30, ST03
	lpm T0, Z
	mov r30, ST13
	lpm ST03, Z
	mov r30, ST23
	lpm ST13, Z
	mov r30, ST33
	lpm ST23, Z
	mov ST33, T0
	
	/* key addition */
	.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
		ld r0, -X
		eor \param, r0
	.endr
	brtc 2f
exit:
	pop r31
	pop r30
	st Z+, ST00
	st Z+, ST01
	st Z+, ST02
	st Z+, ST03
	st Z+, ST10
	st Z+, ST11
	st Z+, ST12
	st Z+, ST13
	st Z+, ST20
	st Z+, ST21
	st Z+, ST22
	st Z+, ST23
	st Z+, ST30
	st Z+, ST31
	st Z+, ST32
	st Z+, ST33
	pop r29
	pop r28
	pop_range 2, 17
	ret
2:	
	/* inv column (row) mixing*/
	/* invMixCol (Row) 1 */
		/* preparing */
	ldi r31, hi8(lut_gf256mul_0x09)
	mov T0, ST03
	eor T0, ST02 ; T0 = t
	mov T1, ST00
	eor T1, ST01 ; T1 = u
	mov r30, T0
	eor r30, T1
	lpm T2, Z  ; T2 = v'
	
	ldi r31, hi8(lut_gf256mul_0x04)
	mov r30, ST02
	eor r30, ST00
	lpm T3, Z
	eor T3, T2;  T3 = w
	
	mov r30, ST03
	eor r30, ST01
	lpm P, Z  ; T2 = v
	eor T2, P 
	 
	   /* now the big move */
	mov T4, ST00
	eor T4, ST03
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST03, T4	

	mov T4, ST02
	eor T4, ST01
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST01, T4
	
	lsl T0
	brcc 3f
	eor T0, xREDUCER
3:  eor T0, T3
	eor ST02, T0
	
	lsl T1
	brcc 3f
	eor T1, xREDUCER
3:  eor T1, T3
	eor ST00, T1
	
	/* invMixCol (Row) 2 */
		/* preparing */
	ldi r31, hi8(lut_gf256mul_0x09)
	mov T0, ST13
	eor T0, ST12 ; T0 = t
	mov T1, ST10
	eor T1, ST11 ; T1 = u
	mov r30, T0
	eor r30, T1

	lpm T2, Z  ; T2 = v'
	
	ldi r31, hi8(lut_gf256mul_0x04)
	mov r30, ST12
	eor r30, ST10
	lpm T3, Z
	eor T3, T2;  T3 = w
	
	mov r30, ST13
	eor r30, ST11
	lpm P, Z
	eor T2, P ; T2 = v
	 
	   /* now the big move */
	mov T4, ST10
	eor T4, ST13
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST13, T4	

	mov T4, ST12
	eor T4, ST11
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST11, T4
	
	lsl T0
	brcc 3f
	eor T0, xREDUCER
3:  eor T0, T3
	eor ST12, T0
	
	lsl T1
	brcc 3f
	eor T1, xREDUCER
3:  eor T1, T3
	eor ST10, T1
	
	/* invMixCol (Row) 2 */
		/* preparing */
	ldi r31, hi8(lut_gf256mul_0x09)
	mov T0, ST23
	eor T0, ST22 ; T0 = t
	mov T1, ST20
	eor T1, ST21 ; T1 = u
	mov r30, T0
	eor r30, T1

	lpm T2, Z  ; T2 = v'
	
	ldi r31, hi8(lut_gf256mul_0x04)
	mov r30, ST22
	eor r30, ST20
	lpm T3, Z
	eor T3, T2;  T3 = w
	
	mov r30, ST23
	eor r30, ST21
	lpm P, Z
	eor T2, P ; T2 = v
	 
	   /* now the big move */
	mov T4, ST20
	eor T4, ST23
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST23, T4	

	mov T4, ST22
	eor T4, ST21
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST21, T4
	
	lsl T0
	brcc 3f
	eor T0, xREDUCER
3:  eor T0, T3
	eor ST22, T0
	
	lsl T1
	brcc 3f
	eor T1, xREDUCER
3:  eor T1, T3
	eor ST20, T1
	
	/* invMixCol (Row) 3 */
		/* preparing */
	ldi r31, hi8(lut_gf256mul_0x09)
	mov T0, ST33
	eor T0, ST32 ; T0 = t
	mov T1, ST30
	eor T1, ST31 ; T1 = u
	mov r30, T0
	eor r30, T1

	lpm T2, Z  ; T2 = v'
	
	ldi r31, hi8(lut_gf256mul_0x04)
	mov r30, ST32
	eor r30, ST30
	lpm T3, Z
	eor T3, T2;  T3 = w
	
	mov r30, ST33
	eor r30, ST31
	lpm P, Z
	eor T2, P ; T2 = v
	 
	   /* now the big move */
	mov T4, ST30
	eor T4, ST33
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST33, T4	

	mov T4, ST32
	eor T4, ST31
	lsl T4
	brcc 3f
	eor T4, xREDUCER
3:  eor T4, T2
	eor ST31, T4
	
	lsl T0
	brcc 3f
	eor T0, xREDUCER
3:  eor T0, T3
	eor ST32, T0
	
	lsl T1
	brcc 3f
	eor T1, xREDUCER
3:  eor T1, T3
	eor ST30, T1
	
	rjmp 1b

.balign 256

lut_gf256mul_0x09:
.byte	0x00, 0x09, 0x12, 0x1B, 0x24, 0x2D, 0x36, 0x3F
.byte	0x48, 0x41, 0x5A, 0x53, 0x6C, 0x65, 0x7E, 0x77
.byte	0x90, 0x99, 0x82, 0x8B, 0xB4, 0xBD, 0xA6, 0xAF
.byte	0xD8, 0xD1, 0xCA, 0xC3, 0xFC, 0xF5, 0xEE, 0xE7
.byte	0x3B, 0x32, 0x29, 0x20, 0x1F, 0x16, 0x0D, 0x04
.byte	0x73, 0x7A, 0x61, 0x68, 0x57, 0x5E, 0x45, 0x4C
.byte	0xAB, 0xA2, 0xB9, 0xB0, 0x8F, 0x86, 0x9D, 0x94
.byte	0xE3, 0xEA, 0xF1, 0xF8, 0xC7, 0xCE, 0xD5, 0xDC
.byte	0x76, 0x7F, 0x64, 0x6D, 0x52, 0x5B, 0x40, 0x49
.byte	0x3E, 0x37, 0x2C, 0x25, 0x1A, 0x13, 0x08, 0x01
.byte	0xE6, 0xEF, 0xF4, 0xFD, 0xC2, 0xCB, 0xD0, 0xD9
.byte	0xAE, 0xA7, 0xBC, 0xB5, 0x8A, 0x83, 0x98, 0x91
.byte	0x4D, 0x44, 0x5F, 0x56, 0x69, 0x60, 0x7B, 0x72
.byte	0x05, 0x0C, 0x17, 0x1E, 0x21, 0x28, 0x33, 0x3A
.byte	0xDD, 0xD4, 0xCF, 0xC6, 0xF9, 0xF0, 0xEB, 0xE2
.byte	0x95, 0x9C, 0x87, 0x8E, 0xB1, 0xB8, 0xA3, 0xAA
.byte	0xEC, 0xE5, 0xFE, 0xF7, 0xC8, 0xC1, 0xDA, 0xD3
.byte	0xA4, 0xAD, 0xB6, 0xBF, 0x80, 0x89, 0x92, 0x9B
.byte	0x7C, 0x75, 0x6E, 0x67, 0x58, 0x51, 0x4A, 0x43
.byte	0x34, 0x3D, 0x26, 0x2F, 0x10, 0x19, 0x02, 0x0B
.byte	0xD7, 0xDE, 0xC5, 0xCC, 0xF3, 0xFA, 0xE1, 0xE8
.byte	0x9F, 0x96, 0x8D, 0x84, 0xBB, 0xB2, 0xA9, 0xA0
.byte	0x47, 0x4E, 0x55, 0x5C, 0x63, 0x6A, 0x71, 0x78
.byte	0x0F, 0x06, 0x1D, 0x14, 0x2B, 0x22, 0x39, 0x30
.byte	0x9A, 0x93, 0x88, 0x81, 0xBE, 0xB7, 0xAC, 0xA5
.byte	0xD2, 0xDB, 0xC0, 0xC9, 0xF6, 0xFF, 0xE4, 0xED
.byte	0x0A, 0x03, 0x18, 0x11, 0x2E, 0x27, 0x3C, 0x35
.byte	0x42, 0x4B, 0x50, 0x59, 0x66, 0x6F, 0x74, 0x7D
.byte	0xA1, 0xA8, 0xB3, 0xBA, 0x85, 0x8C, 0x97, 0x9E
.byte	0xE9, 0xE0, 0xFB, 0xF2, 0xCD, 0xC4, 0xDF, 0xD6
.byte	0x31, 0x38, 0x23, 0x2A, 0x15, 0x1C, 0x07, 0x0E
.byte	0x79, 0x70, 0x6B, 0x62, 0x5D, 0x54, 0x4F, 0x46 

lut_gf256mul_0x04:
.byte	0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C
.byte	0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C
.byte	0x40, 0x44, 0x48, 0x4C, 0x50, 0x54, 0x58, 0x5C
.byte	0x60, 0x64, 0x68, 0x6C, 0x70, 0x74, 0x78, 0x7C
.byte	0x80, 0x84, 0x88, 0x8C, 0x90, 0x94, 0x98, 0x9C
.byte	0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC
.byte	0xC0, 0xC4, 0xC8, 0xCC, 0xD0, 0xD4, 0xD8, 0xDC
.byte	0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC
.byte	0x1B, 0x1F, 0x13, 0x17, 0x0B, 0x0F, 0x03, 0x07
.byte	0x3B, 0x3F, 0x33, 0x37, 0x2B, 0x2F, 0x23, 0x27
.byte	0x5B, 0x5F, 0x53, 0x57, 0x4B, 0x4F, 0x43, 0x47
.byte	0x7B, 0x7F, 0x73, 0x77, 0x6B, 0x6F, 0x63, 0x67
.byte	0x9B, 0x9F, 0x93, 0x97, 0x8B, 0x8F, 0x83, 0x87
.byte	0xBB, 0xBF, 0xB3, 0xB7, 0xAB, 0xAF, 0xA3, 0xA7
.byte	0xDB, 0xDF, 0xD3, 0xD7, 0xCB, 0xCF, 0xC3, 0xC7
.byte	0xFB, 0xFF, 0xF3, 0xF7, 0xEB, 0xEF, 0xE3, 0xE7
.byte	0x36, 0x32, 0x3E, 0x3A, 0x26, 0x22, 0x2E, 0x2A
.byte	0x16, 0x12, 0x1E, 0x1A, 0x06, 0x02, 0x0E, 0x0A
.byte	0x76, 0x72, 0x7E, 0x7A, 0x66, 0x62, 0x6E, 0x6A
.byte	0x56, 0x52, 0x5E, 0x5A, 0x46, 0x42, 0x4E, 0x4A
.byte	0xB6, 0xB2, 0xBE, 0xBA, 0xA6, 0xA2, 0xAE, 0xAA
.byte	0x96, 0x92, 0x9E, 0x9A, 0x86, 0x82, 0x8E, 0x8A
.byte	0xF6, 0xF2, 0xFE, 0xFA, 0xE6, 0xE2, 0xEE, 0xEA
.byte	0xD6, 0xD2, 0xDE, 0xDA, 0xC6, 0xC2, 0xCE, 0xCA
.byte	0x2D, 0x29, 0x25, 0x21, 0x3D, 0x39, 0x35, 0x31
.byte	0x0D, 0x09, 0x05, 0x01, 0x1D, 0x19, 0x15, 0x11
.byte	0x6D, 0x69, 0x65, 0x61, 0x7D, 0x79, 0x75, 0x71
.byte	0x4D, 0x49, 0x45, 0x41, 0x5D, 0x59, 0x55, 0x51
.byte	0xAD, 0xA9, 0xA5, 0xA1, 0xBD, 0xB9, 0xB5, 0xB1
.byte	0x8D, 0x89, 0x85, 0x81, 0x9D, 0x99, 0x95, 0x91
.byte	0xED, 0xE9, 0xE5, 0xE1, 0xFD, 0xF9, 0xF5, 0xF1
.byte	0xCD, 0xC9, 0xC5, 0xC1, 0xDD, 0xD9, 0xD5, 0xD1

